Source Code of org.terrier.structures.indexing.BlockFieldLexiconMap

/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org/
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is BlockFieldLexiconMap.java
 *
 * The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original contributor)
 */
package org.terrier.structures.indexing;


import gnu.trove.TObjectIntHashMap;
import gnu.trove.TObjectIntProcedure;


import java.io.IOException;
import java.util.Arrays;


import org.terrier.structures.BlockFieldLexiconEntry;
import org.terrier.structures.LexiconOutputStream;
import org.terrier.utility.TermCodes;
/** BlockFieldLexiconMap class */
public class BlockFieldLexiconMap extends BlockLexiconMap {
  protected final int fieldCount;
  protected final TObjectIntHashMap<String>field_tfs[]; 
  /**
   * Constructs an instance of the BlockFieldLexiconMap.
   * @param _fieldCount
   */
  @SuppressWarnings("unchecked")
  public BlockFieldLexiconMap(int _fieldCount)
  {
    super();
    fieldCount = _fieldCount;
    field_tfs = new TObjectIntHashMap[fieldCount];
    for(int fi=0;fi<fieldCount;fi++)
      field_tfs[fi] = new TObjectIntHashMap<String>(BUNDLE_AVG_UNIQUE_TERMS);
  }
  
  protected int[] getFieldFrequency(String term)
  {
    int[] fieldFrequencies = new int[fieldCount];
    for(int fi=0;fi<fieldCount;fi++)
      fieldFrequencies[fi] = field_tfs[fi].get(term);
    return fieldFrequencies;
  }
  
  /** Inserts all the terms from a document posting
    * into the lexicon map
    * @param _doc The postinglist for that document. Assumed to be of type BlockFieldDocumentPostingList.
    */
  public void insert(DocumentPostingList _doc)
  {
    super.insert(_doc);
    BlockFieldDocumentPostingList doc = (BlockFieldDocumentPostingList)_doc;
    int fi = 0;
    for(TObjectIntHashMap<String> docField : doc.field_occurrences)
    {
      final TObjectIntHashMap<String> thisField = field_tfs[fi];
      docField.forEachEntry(new TObjectIntProcedure<String>() {
        public boolean execute(String arg0, int arg1) {
          thisField.adjustOrPutValue(arg0, arg1, arg1);
          return true;
        }
      });
      fi++;
    }
  }
  
  /** Stores the lexicon tree to a lexicon stream as a sequence of entries.
    * The binary tree is traversed in order, by called the method
    * traverseAndStoreToStream.
    * @param lexiconStream The lexicon output stream to store to. */
  public void storeToStream(LexiconOutputStream<String> lexiconStream) throws IOException
  {
    final String[] terms = tfs.keys(new String[0]);
    Arrays.sort(terms);
    for (String t : terms)
    {
      BlockFieldLexiconEntry fle = new BlockFieldLexiconEntry(getFieldFrequency(t), blockFreqs.get(t));
      fle.setOffset((long)0, (byte)0);
      fle.setTermId(TermCodes.getCode(t));
      fle.setStatistics(nts.get(t), tfs.get(t));  
      lexiconStream.writeNextEntry(t, fle);
    }
  }


  @Override
  public void clear() {
    super.clear();
    for(int fi=0;fi<fieldCount;fi++)
      field_tfs[fi].clear();
  }
}
Source Code of org.terrier.structures.indexing.BlockFieldLexiconMap

Related Classes of org.terrier.structures.indexing.BlockFieldLexiconMap